library(ggplot2)
library(plotly)
library(splines)
# Number of observations
n <- 1000
# Generate independent variable (X)
X <- runif(n, min = 0, max = 10)
# Simulate residuals (epsilon)
epsilon <- rnorm(n, mean = 0, sd = 1)
# Calculate dependent variable (Y) using the linear relationship
Y <- 1.2 * X + epsilon
dataframe_origin = as.data.frame(cbind(Y,X,epsilon))
# check model outcome
ggplot(dataframe_origin,aes(x = X,y = Y))+geom_point()+geom_smooth(method = "lm",se= FALSE)+
ggtitle(expression("y = 1.2x"+epsilon))+theme(plot.title = element_text(hjust = 0.5,size =14))
# prepare model
model1 = lm(Y~X, data = dataframe_origin)
res = model1$residuals
fitted_values = fitted(model1)
# use ggplot to draw residual plot
ggplot(dataframe_origin, aes(x = fitted_values, y = res)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
x_square = X^2
Y_new = Y+x_square
dataframe_new = as.data.frame(cbind(dataframe_origin,x_square,Y_new))
model2 = lm(Y_new~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model2), y = model2$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
# 假设你已经有了一个模型
model_3D_square = lm(Y_new ~ X + x_square, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_square, z = residuals(model_3D_square), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "X^2"),
zaxis = list(title = "Residuals")))
x_cubic = X^3
Y_cubic = Y+x_cubic
dataframe_new = as.data.frame(cbind(dataframe_origin,x_cubic,Y_cubic))
model3 = lm(Y_cubic~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model3), y = model3$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_cubic = lm(Y_new ~ X + x_cubic, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_cubic, z = residuals(model_3D_cubic), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "X^3"),
zaxis = list(title = "Residuals")))
Y_mix = Y+x_cubic+x_square
dataframe_new = as.data.frame(cbind(dataframe_origin,Y_mix))
model_mix = lm(Y_mix~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model_mix), y = model_mix$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_mix = lm(Y_new ~ X + x_square+x_cubic, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_square, z = residuals(model_3D_mix), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "X^2"),
zaxis = list(title = "Residuals")))
x_qudra = X^4
Y_qudra = Y+x_qudra
dataframe_new = as.data.frame(cbind(dataframe_origin,x_qudra,Y_qudra))
model4 = lm(Y_qudra~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model4), y = model4$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_qudra = lm(Y_new ~ X + x_qudra, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_qudra, z = residuals(model_3D_qudra), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "X^4"),
zaxis = list(title = "Residuals")))
x_power = X^100
Y_power = Y+x_power
dataframe_new = as.data.frame(cbind(dataframe_origin,x_power,Y_power))
model5 = lm(Y_power~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model5), y = model5$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_power = lm(Y_new ~ X + x_power, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_power, z = residuals(model_3D_power), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "X^100"),
zaxis = list(title = "Residuals")))
x_log = log(X)
Y_log = Y+x_log
dataframe_new = as.data.frame(cbind(dataframe_origin,x_log,Y_log))
model_log = lm(Y_log~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model_log), y = model_log$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_log = lm(Y_log ~ X + x_log, data = dataframe_new)
# 创建三维残差图
plot_ly(data = dataframe_new, x = ~X, y = ~x_log, z = residuals(model_3D_log), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "log(x)"),
zaxis = list(title = "Residuals")))
x_exp = exp(X)
Y_exp = Y+x_exp
dataframe_new = as.data.frame(cbind(dataframe_origin,x_exp,Y_exp))
model_exp = lm(Y_exp~X,data = dataframe_new)
#draw residual plot
ggplot(dataframe_new, aes(x = fitted(model_exp), y = model_exp$residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = 'red') +
ggtitle("Residual Plot") +
xlab("Predicted Model Value") +
ylab("Residuals") +
theme(plot.title = element_text(hjust = 0.5))
model_3D_exp = lm(Y_exp ~ X + x_exp, data = dataframe_new)
# create 3-d residual plot
plot_ly(data = dataframe_new, x = ~X, y = ~x_exp, z = residuals(model_3D_exp), type = "scatter3d", mode = "markers") %>%
layout(title = "3D Residual Plot",
scene = list(xaxis = list(title = "X"),
yaxis = list(title = "exp(x)"),
zaxis = list(title = "Residuals")))
# Load necessary library
library(ggplot2)
# Generating synthetic data
set.seed(0)
X <- runif(100, min = 0, max = 10) # 100 random points between 0 and 10
y <- 2.5 * X^2 + rnorm(100, mean = 0, sd = 2) # Linear relation with some noise
# Fitting a linear regression model
model <- lm(y ~ X)
# Making predictions
y_pred <- predict(model, newdata = data.frame(X = X))
# Applying max/min constraints
y_pred_max_min <- y_pred
y_pred_max_min[X < 3] <- pmin(8, y_pred[X < 3]) # Max constraint
y_pred_max_min[X > 7] <- pmax(4, y_pred[X > 7]) # Min constraint
# Calculating residuals
residuals_original <- y - y_pred
residuals_constrained <- y - y_pred_max_min
# Creating a data frame for plotting
plot_data <- data.frame(X, y, y_pred, y_pred_max_min, residuals_original, residuals_constrained)
# Plotting the results and residual plots
ggplot(plot_data, aes(x = X, y = y)) +
geom_point(aes(color = "Data points")) +
geom_line(aes(y = y_pred, color = "Linear Regression")) +
geom_line(aes(y = y_pred_max_min, color = "Max/Min Constrained Regression"), linetype = "dashed") +
labs(title = "Linear Regression with Max/Min Constraints", x = "X", y = "Y") +
scale_color_manual(values = c("blue", "green", "red")) +
theme_minimal()
# Plotting residual plots
ggplot(plot_data, aes(x = X, y = residuals_original)) +
geom_point(color = "blue") +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residuals of Linear Regression", x = "X", y = "Residuals") +
theme_minimal()
ggplot(plot_data, aes(x = X, y = residuals_constrained)) +
geom_point(color = "red") +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residuals of Max/Min Constrained Regression", x = "X", y = "Residuals") +
theme_minimal()
# Generating synthetic data
set.seed(0)
X <- 1:100
y <- sin(X / 10) + rnorm(100, sd = 0.2) # Non-linear relationship with noise
# Fitting a spline regression model
model_sin <- lm(y ~ bs(X, degree = 3, knots = c(30, 60, 90)))
# Making predictions
X_pred <- 1:100
y_pred <- predict(model_sin, newdata = data.frame(X = X_pred))
# Calculating residuals
residuals_spline <- y - y_pred
# Creating plot data for residuals
plot_data_spline <- data.frame(X = X, residuals_spline = residuals_spline)
# Plotting the results
plot_data <- data.frame(X = X, y = y, y_pred = y_pred)
ggplot(plot_data, aes(x = X, y = y)) +
geom_point(color = "blue", size = 2) +
geom_line(aes(y = y_pred), color = "red") +
labs(title = "Spline Regression Example", x = "X", y = "Y") +
theme_minimal()
# Plotting the residual plot for the spline model
ggplot(plot_data_spline, aes(x = X, y = residuals_spline)) +
geom_point(color = "red") +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residuals of Spline Regression Model", x = "X", y = "Residuals") +
theme_minimal()
# Generating synthetic linear data
set.seed(0)
X <- seq(0, 10, length.out = 100)
y <- 2 * X + rnorm(100, sd = 2)
# Fitting a spline model
spline_model <- lm(y ~ bs(X, degree = 10, knots = c(3, 7)), data = data.frame(X, y))
# Making predictions
y_pred_spline_1 <- predict(spline_model, newdata = data.frame(X))
# Calculating residuals
residuals_spline_1 <- y - y_pred_spline_1
# Creating plot data for residuals
plot_data_spline_1 <- data.frame(X = X, residuals_spline_1 = residuals_spline_1)
# Plotting the results
ggplot(data.frame(X, y, y_pred_spline_1), aes(x = X)) +
geom_point(aes(y = y), color = "blue") +
geom_line(aes(y = y_pred_spline_1), color = "red") +
labs(title = "Spline Model Fit on Linear Data", x = "X", y = "Y") +
theme_minimal()
# Plotting residual plot
ggplot(data.frame(X, residuals_spline_1), aes(X, residuals_spline_1)) +
geom_point(color = "red") +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residuals of Spline Model", x = "X", y = "Residuals") +
theme_minimal()
The residual plot looks fine, but the model fitting graph looks terrible.
When building regression models, it’s important to adopt a bold and experimental mindset. Being open to incorporating new elements and considering diverse approaches is crucial. Relying solely on residual plots or the fit of the model itself provides a limited perspective. A comprehensive approach, including various methodologies and analyses, is essential for effective modeling. Some intuition about different function’s graph might be useful in narrowing our choice scope, but each addition needs to be evaluated carefully, because we need to think about the interpretability and model complexity. Especially under the scope of social science; however, in the case of model prediction, such problems might not be a serious issue. All in all, all models cannot be perfect, we just simplify the real world problems or scenarios. Being critical is the key.